Density Plot
Density Plot with Outlier Highlight using GGplot2
density_plot_infection_risk = ggplot(senic_data) +
ggtitle("Density plot of Infection_Risk") +
geom_density(aes(x=Infection_Risk), fill = "lightblue") +
geom_point(data=senic_data[get_outliers(senic_data$Infection_Risk),],
aes(x=Infection_Risk, y=0, colour="Outliers"),
shape=23, size=2, fill="red") +
scale_color_manual(values = c("darkblue","black")) +
labs(colour="Legend") +
theme(plot.title = element_text(hjust = 0.5), legend.position = "right")
density_plot_infection_risk

Density Plot with Outlier Highlight using Plotly (converting from ggplot2)
x <- ggplotly(p=density_plot_infection_risk)
x
Histogram Plot
Histogram plot with Outlier Highlight using Plotly
outliers = senic_data[get_outliers(senic_data$Infection_Risk),c("Infection_Risk")]
senic_data$zero = 0
plot_ly(senic_data, x=~Infection_Risk) %>%
add_histogram(name="Histogram count") %>%
filter(is.element(Infection_Risk, outliers)) %>%
add_markers(x=~Infection_Risk,y=~zero, name="Outliers",
marker=list(symbol="diamond", size=10, line = list(color="black", width=1))) %>%
layout(title="Histogram of Infection_Risk", yaxis=list(title="Count"))
Scatter Plot
Simple scatter plot with colour
ggplot(senic_data) + geom_point(aes(x=Number_of_Nurses, y=Infection_Risk, color=Number_of_Beds)) +
ggtitle("Scatterplot of Infection_Risk vs Number_of_Nurses") +
theme(plot.title = element_text(hjust = 0.5))

Scatter Plot with Discreetization (split a variable into classes)
ggplot(olive_data) +
geom_point(aes(x = oleic, y = palmitic,
color=cut_interval(olive_data$linolenic, n = 4))) +
ggtitle("Dependence of Palmitic vs Oleic vs Linolenic") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(color = 'Linolenic range')

Scatter plot size varied
ggplot(olive_data) + geom_point(aes(x = oleic, y = palmitic, size = cut_interval(linolenic, n = 4))) +
ggtitle("Dependence of Palmitic vs Oleic vs Linolenic") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_size_manual(name = "Linolenic range", values = c(1, 2, 3, 4))

Scatter plot angle varied
# Pre-processing - Setting angle values based on category
olive_data$linolenic_class <- cut_interval(olive_data$linolenic, n = 4)
levels(olive_data$linolenic_class) <- (0:3) * (pi/4)
olive_data$linolenic_class <- as.numeric(as.character(olive_data$linolenic_class))
ggplot(olive_data, aes(x=oleic, y=palmitic)) + geom_point() +
geom_spoke(aes(angle = olive_data$linolenic_class), radius=40) +
ggtitle("Dependence of Palmitic vs Oleic vs Linolenic
Legend
Orientation angle of spoke : Linolenic class
0:(0,18.5], 45:(18.5,37], 90:(37,55.5], 135:(0,18.5] ") +
theme(plot.title = element_text(hjust = 0.5))

Scatter plot with color, size and shape
ggplot(olive_data)+
geom_point(aes(x = oleic, y = eicosenoic, color = cut_interval(linoleic, n = 3),
shape = cut_interval(palmitic, n = 3),
size = cut_interval(palmitoleic, n = 3))) +
scale_size_manual(values = c(2,3,4)) +
labs(shape = "Palmitic range", color = "Linoleic range", size = "Palmitoleic range") +
ggtitle("Dependence of Oleic vs Eicosenoic vs Linoleic vs Palmitic vs Palmitoleic") +
theme(plot.title = element_text(hjust = 0.5))

Scatter Plot of MDS
baseball_scaled <- scale(baseball_data[,3:length(baseball_data)])
### Distance Matrix between rows
distance_matrix <- dist(baseball_scaled, method = "euclidean")
### Non-metric MDS
mds_result <- isoMDS(distance_matrix, k=2, p=2)
## initial value 19.856833
## iter 5 value 16.319153
## iter 10 value 16.046215
## final value 15.935476
## converged
coords <- mds_result$points
coords_mds <- as.data.frame(coords)
baseball_data_with_mds <- baseball_data
baseball_data_with_mds$MDS_V1 <- coords_mds$V1
baseball_data_with_mds$MDS_V2 <- coords_mds$V2
plot_ly(baseball_data_with_mds, x=~MDS_V1, y=~MDS_V2) %>%
add_markers(color=~League, colors = c("blue", "red"),
text=baseball_data_with_mds$Team, hoverinfo="text") %>%
layout(title="Scatterplot of the 2 MDS variables")
Shepard Plot
Shepard plot shows the fit of MDS, the distance in original dataset vs. distance in reordered dataset should be similar
baseball_scaled <- scale(baseball_data[,3:length(baseball_data)])
### Distance Matrix between rows
distance_matrix <- dist(baseball_scaled, method = "euclidean")
mds_result <- isoMDS(distance_matrix, k=2, p=2)
## initial value 19.856833
## iter 5 value 16.319153
## iter 10 value 16.046215
## final value 15.935476
## converged
coords <- mds_result$points
shp <- Shepard(distance_matrix, coords)
delta <- as.numeric(distance_matrix)
D <- as.numeric(dist(coords, method = "euclidean"))
n <- nrow(coords)
index <- matrix(1:n, nrow=n, ncol=n)
index1 <- as.numeric(index[lower.tri(index)])
n <- nrow(coords)
index <- matrix(1:n, nrow=n, ncol=n, byrow = T)
index2 <- as.numeric(index[lower.tri(index)])
plot_ly()%>%
add_markers(x=~delta, y=~D, name="Observation pairs", hoverinfo = 'text',
text = ~paste('Obj 1: ',
rownames(baseball_data_with_mds)[index1],
'<br> Obj 2: ',
rownames(baseball_data_with_mds)[index2])) %>%
add_lines(x=~shp$x, y=~shp$yf, name="Isotonic regression line") %>%
layout(title="Shepard's plot of MDS operation")
Dot Map Plots (World Map)
Dot Map (Map with scatter plots)
plot_mapbox(aegypti_data[aegypti_data$YEAR == 2004,], lat = ~Y, lon = ~X) %>%
add_markers(color = ~VECTOR, hoverinfo = "text",
text = ~paste(COUNTRY), alpha = 0.7) %>%
layout(title = "Dot map of mosquito distribution in the world (2004)")
Choropleth Map
Chorpleth Map with Equirectangular Projection
# Data aggregation
country_aggregate = aggregate(aegypti_data[,c("COUNTRY", "COUNTRY_ID")],
by = list(aegypti_data$COUNTRY, aegypti_data$COUNTRY_ID), FUN=length)
country_aggregate$COUNTRY = NULL
colnames(country_aggregate) = c("COUNTRY", "COUNTRY_ID", "Count")
plot_geo(country_aggregate) %>% add_trace(
z = ~Count,
text = ~COUNTRY, locations = ~COUNTRY_ID) %>%
layout(title = "Choropleth plot of number of mosquitoes",
geo = list(projection = list(type = "equirectangular")))
Choropleth plot with Equirectangular Projection and log
plot_geo(country_aggregate) %>%
add_trace(
z = ~log(Count) ,
text = ~paste(COUNTRY, "\n Count: ", Count), locations = ~COUNTRY_ID,
hoverinfo = "text"
) %>%
layout(
title = "Choropleth plot of number of mosquitoes",
geo = list(projection = list(type = "equirectangular")))
Choropleth plot with Conic Area Projection and log
plot_geo(country_aggregate) %>%
add_trace(
z = ~log(Count) ,
text = ~paste(COUNTRY, "\n Count: ", Count), locations = ~COUNTRY_ID
) %>%
layout(
title = "Choropleth plot of number of mosquitoes",
geo = list(projection = list(type = "conic equal area")))
Choropleth plot using custom shape files(sf file)
swe_data = read.csv("000000KD.csv")
swe_data_processed = data.frame(region = unique(swe_data$region))
swe_data_split = split(swe_data, swe_data$age)
for (i in seq_along(swe_data_split)) {
swe_data_processed[[names(swe_data_split)[i]]] = merge(swe_data_split[[i]],
swe_data_processed$region,
by.x = 'region',
by.y = 1, all = T)$X2016
}
colnames(swe_data_processed) = c("region", "Young", "Adult", "Senior")
swe_data_processed$region = gsub(" county", "", swe_data_processed$region)
swe_data_processed$region = gsub("\\d{2} ", "", swe_data_processed$region)
swe_data_processed$region = gsub("Örebro", "Orebro", swe_data_processed$region)
rownames(swe_data_processed) = swe_data_processed$region
rds = readRDS('gadm36_SWE_1_sf.rds')
rds$Young = swe_data_processed[rds$NAME_1, "Young"]
plot_ly() %>% add_sf(data = rds, split = ~NAME_1,
color = ~Young, showlegend = F, alpha = 1) %>%
layout(title = "Choropleth plot of mean income of Young age group")